#include "gb.h"
#include <stdio.h>

enum tileType
{
	BG,		// Background
	WIN		// Window
};

// Writes next line to screen buffer
void gb_gpu_writeLine(gb_state *gb);
void gb_gpu_writeTileLine(gb_state *gb, enum tileType tt);
void gb_gpu_writeOBJLine(gb_state *gb);
void gb_gpu_writeWhiteLine(gb_state *gb);

void gb_gpu_update(gb_state *gb, int cycles)
{
	gb->gpu_state.cycles += cycles;
	switch(gb->gpu_state.STAT & 0x03)   // Last two bits = gpu mode
	{
        // H-Blank
		case 0:
			if(gb->gpu_state.cycles >= 204)
			{
				gb->gpu_state.cycles = 0;
				gb->gpu_state.LY++;
                if(gb->gpu_state.LY == gb->gpu_state.LYC)
                {
                    // Bit 2 = Coincidence Flag (0:LYC<>LY, 1:LYC=LY)
                    gb->gpu_state.STAT |= 0x04;
                    // Request STAT interrupt if STAT bit 6 is set
                    if(gb->gpu_state.STAT & 0x40)
                        gb->IF |= 0x02;
                }
                else
                {
                    // Set bit 2 to zero
                    gb->gpu_state.STAT &= 0xfb;
                }

				if(gb->gpu_state.LY == 144)
				{
					// Set mode = 1
					gb->gpu_state.STAT |= 0x01;
                    gb->gpu_state.frame_ready = 1;
                    // Request V-Blank interrupt
                    gb->IF |= 0x01;
                    // Request STAT interrupt if STAT bit 4 is set
                    if(gb->gpu_state.STAT & 0x10)
                        gb->IF |= 0x02;
				}
				else
				{
					gb->gpu_state.STAT |= 0x02;
				}
			}
			break;
        // V-Blank
		case 1:
			if(gb->gpu_state.cycles >= 456)
			{
				gb->gpu_state.cycles = 0;
				gb->gpu_state.LY++;
                if(gb->gpu_state.LY == gb->gpu_state.LYC)
                {
                    // Bit 2 = Coincidence Flag (0:LYC<>LY, 1:LYC=LY)
                    gb->gpu_state.STAT |= 0x04;
                    // Request STAT interrupt if STAT bit 6 is set
                    if(gb->gpu_state.STAT & 0x40)
                        gb->IF |= 0x02;
                }
                else
                {
                    // Set bit 2 to zero
                    gb->gpu_state.STAT &= 0xfb;
                }
				if(gb->gpu_state.LY > 153)
				{
					// Set mode = 2
					gb->gpu_state.STAT ^= 0x03;
					gb->gpu_state.LY = 0;
					// Request STAT interrupt if STAT bit 5 is set
					if(gb->gpu_state.STAT & 0x20)
                        gb->IF |= 0x02;
				}
			}
			break;
        // Seaching OAM-RAM
		case 2:
			if(gb->gpu_state.cycles >= 80)
			{
				// Set mode = 3
				gb->gpu_state.STAT |= 0x01;
				gb->gpu_state.cycles = 0;
			}
			break;
        // Transferring data to LCD
		case 3:
			if(gb->gpu_state.cycles >= 172)
			{
				// Set mode = 0;
				gb->gpu_state.STAT ^= 0x03;
				gb->gpu_state.cycles = 0;
                gb_gpu_writeLine(gb);
                // Request STAT interrupt if STAT bit 3 is set
                if(gb->gpu_state.STAT & 0x08)
                    gb->IF |= 0x02;
			}
			break;
	}
	return;
}

void gb_gpu_writeLine(gb_state *gb)
{
    // Bit 7 = LCD Display Enable
    if(gb->gpu_state.LCDC & 0x80)
    {
        // Bit 0 = BG Display
        if(gb->gpu_state.LCDC & 0x01)
        {
            gb_gpu_writeTileLine(gb, BG);
        }
        // If BG DISPLAY is disabled, the background should be white
        else
        {
            gb_gpu_writeWhiteLine(gb);
        }
		// Bit 5 = Window Display Enable
		if(gb->gpu_state.LCDC & 0x20)
		{
		    if(gb->gpu_state.WX-7 < PIXELX)
            {
                // WY = Window Y position plus 7
                if(gb->gpu_state.WY <= gb->gpu_state.LY)
                {
                    gb_gpu_writeTileLine(gb, WIN);
                }
            }
		}
        // Bit 1 = OBJ Display
        if(gb->gpu_state.LCDC & 0x02)
        {
            gb_gpu_writeOBJLine(gb);
        }
    }
    return;
}

void gb_gpu_writeTileLine(gb_state *gb, enum tileType tt)
{
    // Up to 21 tiles can be used in a line
    unsigned char tileIndices[21];
    int y = gb->gpu_state.LY;
	int tileMapAddress;
	int tileY;
	int tileMapRowBegin;
	int tileMapRowEnd;
	int tileDataStart;
	int tileNum;
	int tileRow;
	int tileDataAddress;
	int pixelNum;
	unsigned char b1, b2;
	int colorNumMask, colorNum, bufferOffset, colorMask, color;
	int pixelX;
	unsigned char SCX, SCY;

	// How many pixels across and down should be filled by one
	//  gameboy pixel, depending on the scaling factor
	//  between the gameboy native resolution and the
	//  emulator resolution
	int fillX = 0, fillY = 0;
	int filledX = 0;
    int i, j;
    // Compute which tile map is being used
    // Bit 3 = BG tile map display select
	// Bit 6 = Window tile map display select
	switch(tt)
	{
		case BG:
			tileMapAddress = gb->gpu_state.LCDC & 0x08 ?
				VRAM_START + TILEMAP1 : VRAM_START + TILEMAP0;
            SCX = gb->gpu_state.SCX;
            SCY = gb->gpu_state.SCY;
            // Compute row of the tile map
            tileY = ((y + SCY) & 0xff) >> 3;
            // Compute row of the tile
            tileRow = (y + SCY) & 0x07;
            // Start column
            pixelX = 0;
			break;
		case WIN:
			tileMapAddress = gb->gpu_state.LCDC & 0x40 ?
				VRAM_START + TILEMAP1 : VRAM_START + TILEMAP0;
            // Window is not scrollable
            SCX = 0;
            SCY = 0;
            // Compute row of the tile map
            tileY = ((y - gb->gpu_state.WY) & 0xff) >> 3;
            // Compute row of the 8x8 pixel tile
            tileRow = (y - gb->gpu_state.WY) & 0x07;
            // Start column. WX should be between 7 and 166,
            //  but I have seen it set lower
            if(gb->gpu_state.WX >= 7)
                pixelX = gb->gpu_state.WX-7;
            else
                pixelX = 0;
            break;
	}
	// Used to know when to wrap screen back
	//  to beginning of row
	tileMapRowBegin = tileY * 32 + tileMapAddress;
	tileMapRowEnd = (tileY+1) * 32 + tileMapAddress;
    // Row tileY times 32 bytes per row plus X offset
    tileMapAddress += (tileY * 32) + (SCX >> 3);
    for(tileNum=0; tileNum < 21; tileNum++)
    {
        tileIndices[tileNum] = gb_read_mem(gb, tileMapAddress++);
		if(tileMapAddress == tileMapRowEnd)
		{
			tileMapAddress = tileMapRowBegin;
		}
    }

    // Compute which tile pattern table is being used
	// Bit 4 = BG & Window Tile Data Select
    tileDataStart = gb->gpu_state.LCDC & 0x10 ?
            VRAM_START + TILE1 : VRAM_START + TILE0;
    // Tile pattern table 0 numbers tiles from -128 to 127
    // If this table is being used, an unsigned offset
    //  needs to be calculated
    if(tileDataStart == VRAM_START + TILE0)
    {
        for(tileNum=0; tileNum < 21; tileNum++)
        {
            tileIndices[tileNum] -= 128;
            tileIndices[tileNum] &= REG_MAX;
        }
    }

    tileNum = 0;
    // Start of data plus first tile index times 16 bytes per tile
    //  plus tileRow times 2 bytes per row
    tileDataAddress = tileDataStart + (tileIndices[tileNum] * 16) +
            (tileRow * 2);
    // Compute start pixel
    pixelNum = SCX & 0x07;
    // Get row of pixels
    b1 = gb_read_mem(gb, tileDataAddress);
    b2 = gb_read_mem(gb, tileDataAddress+1);
    // Compute bit mask for pixel color number
    colorNumMask = 1 << (7-pixelNum);
    // How many rows have already been filled times 4 bytes per
    //  pixel times screenw pixels per row plus X offset times 4
    //  bytes per pixel
    bufferOffset = (gb->gpu_state.screenh*y/PIXELY) * 4 *
            gb->gpu_state.screenw +
            (gb->gpu_state.screenw*pixelX/PIXELX) * 4;
    // Do not factor this! This is integer division and will
    //  result in a different, incorrect value if factored
    fillY = (gb->gpu_state.screenh * (y+1) / PIXELY) -
            (gb->gpu_state.screenh * y / PIXELY);
    // Compute each pixel in the line and send to buffer
    for(; pixelX < PIXELX; pixelX++)
    {
		// Do not confuse color number with color
        colorNum = ((b1 & colorNumMask) >> (7-pixelNum)) +
				(b2 & colorNumMask ? 2 : 0);
		colorMask = 0x03 << (colorNum << 1);
		color = (gb->gpu_state.BGP & colorMask) >> (colorNum << 1);

		fillX = (gb->gpu_state.screenw * (pixelX+1) / PIXELX) - filledX;
		filledX += fillX;
		switch(color)
		{
			// White
			case 0:
                for(j=bufferOffset;
                        j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                        j += (4*gb->gpu_state.screenw))
                    for(i=j; i < j+(4*fillX); i += 4)
                    {
                        (gb->gpu_state.screen)[i]    = 0xff;
                        (gb->gpu_state.screen)[i+1]  = 0xff;
                        (gb->gpu_state.screen)[i+2]  = 0xff;
                    }
				break;
			// Light gray
			case 1:
                for(j=bufferOffset;
                        j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                        j += (4*gb->gpu_state.screenw))
                    for(i=j; i < j+(4*fillX); i += 4)
                    {
                        (gb->gpu_state.screen)[i]    = 0xc0;
                        (gb->gpu_state.screen)[i+1]  = 0xc0;
                        (gb->gpu_state.screen)[i+2]  = 0xc0;
                    }
				break;
			// Dark gray
			case 2:
                for(j=bufferOffset;
                        j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                        j += (4*gb->gpu_state.screenw))
                    for(i=j; i < j+(4*fillX); i += 4)
                    {
                        (gb->gpu_state.screen)[i]    = 0x60;
                        (gb->gpu_state.screen)[i+1]  = 0x60;
                        (gb->gpu_state.screen)[i+2]  = 0x60;
                    }
				break;
			// Black
			case 3:
                for(j=bufferOffset;
                        j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                        j += (4*gb->gpu_state.screenw))
                    for(i=j; i < j+(4*fillX); i += 4)
                    {
                        (gb->gpu_state.screen)[i]    = 0x00;
                        (gb->gpu_state.screen)[i+1]  = 0x00;
                        (gb->gpu_state.screen)[i+2]  = 0x00;
                    }
				break;
		}
		bufferOffset += fillX*4;

        colorNumMask >>= 1;
        pixelNum++;
        if(pixelNum == 8)
        {
            pixelNum = 0;
            colorNumMask = 0x80;
            // Get next tile
            tileNum++;
            tileDataAddress = tileDataStart + (tileIndices[tileNum] * 16) +
                    (tileRow * 2);
            b1 = gb_read_mem(gb, tileDataAddress);
            b2 = gb_read_mem(gb, tileDataAddress+1);
        }
    }
    return;
}

void gb_gpu_writeOBJLine(gb_state *gb)
{
	// Bit 2 = OBJ size (0=8x8, 1=8x16)
	int spriteHeight = gb->gpu_state.LCDC & 0x04 ? 16 : 8;

	int y = gb->gpu_state.LY;
	int tileRow;
	int startPixel, endPixel;
	int tileDataStart = VRAM_START + TILE1;
	int tileDataAddress;
	// Values defining each sprite taken from OAM
	int spriteY, spriteX, tileNum, attributes;

	int palette;
	int b1, b2;
	// Only used for X flip
	int newb1, newb2, p;

	int bufferOffset;
	int colorNum;
	int colorNumMask;
	int color;
	int colorMask;
	int objOffset;
	int pixelNum;

	int fillX = 0, fillY = 0;
    int i, j;

	for(objOffset=OAM_START;
			objOffset < OAM_START+OAM_SIZE;
			objOffset += 4)
	{
		// Byte0 = Y position + 16
		spriteY = gb_read_mem(gb, objOffset)-16;
		// Check if sprite intersects current line
		if((spriteY > y) ||
				((spriteY + spriteHeight) <= y))
			continue;
		// Which line of the sprite to draw
		tileRow = y - spriteY;
		// Byte1 = X position + 8
		spriteX = gb_read_mem(gb, objOffset+1)-8;
		// Check if sprite is on screen
		if(((spriteX+8) <= 0) ||
				(spriteX >= PIXELX))
			continue;
		// Check if sprite is partially hidden
		if(spriteX < 0)
		{
			startPixel = -spriteX;
			spriteX = 0;
		}
		else
		{
			startPixel = 0;
		}
		if(spriteX > (PIXELX-8))
		{
			endPixel = PIXELX - spriteX - 1;
		}
		else
		{
			endPixel = 7;
		}
		// Byte2 = tile number
		tileNum = gb_read_mem(gb, objOffset+2);
		// Byte3 = attributes
		attributes = gb_read_mem(gb, objOffset+3);
		// Bit 6 = Y flip
		if(attributes & 0x40)
		{
			tileRow = (spriteHeight-1) - tileRow;
		}
		tileDataAddress = tileDataStart + (tileNum * 16) +
                (tileRow * 2);
		// Bit 4 = palette number
		// Palette is ANDed with 0xfc because the lower two
		//  bits are not used
		palette = attributes & 0x10 ? gb->gpu_state.OBP1 & 0xfc :
				gb->gpu_state.OBP0 & 0xfc;
		// Line of pixels
		b1 = gb_read_mem(gb, tileDataAddress);
		b2 = gb_read_mem(gb, tileDataAddress+1);
		// Bit 5 = X flip
		if(attributes & 0x20)
		{
			newb1 = newb2 = 0;
			// Mirror b1 and b2
			for(p=0; p < 7; p++)
			{
				newb1 |= (b1 & 0x01);
				newb2 |= (b2 & 0x01);
				b1 >>= 1;
				b2 >>= 1;
				newb1 <<= 1;
				newb2 <<= 1;
			}
			newb1 |= (b1 & 0x01);
			newb2 |= (b2 & 0x01);
			b1 = newb1;
			b2 = newb2;
		}
		colorNumMask = 0x80 >> startPixel;
        fillY = (gb->gpu_state.screenh * (y+1) / PIXELY) -
                (gb->gpu_state.screenh * y / PIXELY);
		for(pixelNum=startPixel;
                pixelNum <= endPixel;
                pixelNum++, colorNumMask >>= 1, spriteX++)
		{
		    bufferOffset = (gb->gpu_state.screenh*y/PIXELY) * 4 *
                    gb->gpu_state.screenw +
                    (gb->gpu_state.screenw*spriteX/PIXELX) * 4;
			colorNum = ((b1 & colorNumMask) >> (7-pixelNum)) +
					(b2 & colorNumMask ? 2 : 0);
            // Sprites don't display color number 0
            if(colorNum == 0x00)
                continue;
			colorMask = 0x03 << (colorNum << 1);
			color = (palette & colorMask) >> (colorNum << 1);
			// Bit 7 = OBJ-BG priority
			if(!(attributes & 0x80) ||
					(gb->gpu_state.screen[bufferOffset] == 0xff))
			{
			    fillX = (gb->gpu_state.screenw * (spriteX+1) / PIXELX) -
                        (gb->gpu_state.screenw * spriteX / PIXELX);
				switch(color)
				{
				    // White
				    case 0:
                        for(j=bufferOffset;
                                j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                                j += (4*gb->gpu_state.screenw))
                            for(i=j; i < j+(4*fillX); i += 4)
                            {
                                (gb->gpu_state.screen)[i]    = 0xff;
                                (gb->gpu_state.screen)[i+1]  = 0xff;
                                (gb->gpu_state.screen)[i+2]  = 0xff;
                            }
						break;
					// Light gray
					case 1:
                        for(j=bufferOffset;
                                j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                                j += (4*gb->gpu_state.screenw))
                            for(i=j; i < j+(4*fillX); i += 4)
                            {
                                (gb->gpu_state.screen)[i]    = 0xc0;
                                (gb->gpu_state.screen)[i+1]  = 0xc0;
                                (gb->gpu_state.screen)[i+2]  = 0xc0;
                            }
						break;
					// Dark gray
					case 2:
                        for(j=bufferOffset;
                                j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                                j += (4*gb->gpu_state.screenw))
                            for(i=j; i < j+(4*fillX); i += 4)
                            {
                                (gb->gpu_state.screen)[i]    = 0x60;
                                (gb->gpu_state.screen)[i+1]  = 0x60;
                                (gb->gpu_state.screen)[i+2]  = 0x60;
                            }
						break;
					// Black
					case 3:
                        for(j=bufferOffset;
                                j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
                                j += (4*gb->gpu_state.screenw))
                            for(i=j; i < j+(4*fillX); i += 4)
                            {
                                (gb->gpu_state.screen)[i]    = 0x00;
                                (gb->gpu_state.screen)[i+1]  = 0x00;
                                (gb->gpu_state.screen)[i+2]  = 0x00;
                            }
						break;
				}
			}
		}
	}
	return;
}

void gb_gpu_writeWhiteLine(gb_state *gb) {
    int y = gb->gpu_state.LY;
    int bufferOffset = (gb->gpu_state.screenh*y/PIXELY) * 4 *
            gb->gpu_state.screenw;
    int fillY = (gb->gpu_state.screenh * (y+1) / PIXELY) -
                (gb->gpu_state.screenh * y / PIXELY);
    int i, j;
    for(j=bufferOffset;
            j < bufferOffset+(fillY*4*gb->gpu_state.screenw);
            j += (4*gb->gpu_state.screenw))
        for(i=j; i < j+(4*gb->gpu_state.screenw); i += 4) {
            gb->gpu_state.screen[i]      = 0xff;
            gb->gpu_state.screen[i+1]    = 0xff;
            gb->gpu_state.screen[i+2]    = 0xff;
        }
    return;
}
